Abrir xlsx con los sitios de medicion

In [1]:
# Loading
library("readxl")

file <- "../xlsx/estaciones-CA.xlsx"
sheets <- c("todas", "traffic", "traffic-urban", "traffic-urban-2020", 
            "traffic-suburban", "traffic-suburban-2020", 
            "ciudades-100000", "ciudades-100000-A")
# xlsx files
sites <- read_excel(file, sheet=sheets[8])
In [ ]:
# Change categorical variables to factor
sites$Type <- as.factor(sites$Type)
sites$"Site area" <- as.factor(sites$"Site area")
In [2]:
#sites <- sites[sites$Data_end > "2020-10-01" & sites$"Población" > 150000, ]

head(sites)
A tibble: 6 × 7
MunicipioPoblaciónEstación tráficoCódigo estaciónNº estaciones EcologNº estaciones tráficoObservaciones
<chr><dbl><chr><chr><dbl><dbl><chr>
A Coruña 245711CORLAB 1 es1138a41Es Riazor o Santa Margarida, revisar. Datos sólo hasta 2018. REVISAR
Alcalá de Henares195649Alcalá de Henareses1563a01NA
Alcobendas 117040Alcobendas es1564a01NA
Alicante 334887Florida-Babel es1915a31Urban background. Not traffic
NA NAALACANT-EL PLÁ es1635a31Esta es de tráfico
Almería 198533MEDITERRÁNEO es1393a01NA
In [3]:
suppressMessages(library(openair))
suppressMessages(library(ggplot2))
suppressMessages(library(lubridate))
suppressMessages(library(gridExtra))

suppressMessages(library(repr))
options(repr.plot.width=25, 
        repr.plot.height=10,
        #repr.plot.pointsize=50,
        repr.plot.family='serif'
       )

Cargar Datos de fichero

In [ ]:
if (file.exists("../xlsx/dataAQV.csv")) {
    dataAQV <- read.csv("../xlsx/dataAQV.csv")
    
    # Convert date to datetime format
    dataAQV$date <- ymd_hms(dataAQV$date)
    dataAQV$by.day <- ymd(dataAQV$by.day)
    dataAQV$by.month <- ymd(dataAQV$by.month)

    # Change categorical variables to factor
    dataAQV$code <- as.factor(dataAQV$code)
    dataAQV$variable <- as.factor(dataAQV$variable)
    dataAQV$unit <- as.factor(dataAQV$unit)
}
head(dataAQV)

Leer Datos de OpenAir

In [4]:
dataAQV <- importEurope(
    site = sites$"Código estación"[c(1:5)],
    year=2015:2020,
    to_narrow=TRUE,
    #meta=TRUE,
)
In [5]:
# Convert date to datetime format
dataAQV$date <- ymd_hms(dataAQV$date)

# Change categorical variables to factor
dataAQV$code <- as.factor(dataAQV$code)
dataAQV$variable <- as.factor(dataAQV$variable)
dataAQV$unit <- as.factor(dataAQV$unit)

Estudio de los datos

In [6]:
pollutants <- c("no", "no2", "o3", "pm10")

for (pol in pollutants) {
    for (est in levels(dataAQV$code)) {
        if (!(pol %in% dataAQV$variable[dataAQV$code == est])
            & length(dataAQV[dataAQV$date > "2020-03-01 00:00:00" 
                            & dataAQV$code == est, ]) == 0) {
            to_Delete <- which(dataAQV$code == est)
            dataAQV <- dataAQV[-to_Delete, ]
            break
        }
    }
}
In [7]:
print(sum(is.na(dataAQV[dataAQV$variable %in% pollutants, ]$value)))

head(dataAQV)
[1] 0
A tibble: 6 × 5
datecodevariableunitvalue
<dttm><fct><fct><fct><dbl>
2015-01-01es1138aso2 ug.m-3 1.00
2015-01-01es1138apm10ug.m-348.00
2015-01-01es1138ao3 ug.m-3 3.00
2015-01-01es1138ano2 ug.m-353.00
2015-01-01es1138anox ug.m-390.00
2015-01-01es1138aco mg.m-3 0.69
In [8]:
preCOVID <- ggplot(data = dataAQV[dataAQV$date < "2020-03-01 00:00:00"
                                  & dataAQV$variable %in% pollutants
                                  , ],
                   aes(x = date, y = value, color=variable)
                  ) +
                geom_line() +
                geom_line() + 
                facet_wrap(~code, ncol=1)

COVID <- ggplot(data = dataAQV[dataAQV$date > "2020-03-01 00:00:00"
                               & dataAQV$variable %in% pollutants
                               , ],
                   aes(x = date, y = value, color=variable)
                  ) +
                geom_line() +
                geom_line() + 
                facet_wrap(~code, ncol=1)
    
grid.arrange(preCOVID, COVID, nrow = 1)

Resolucion de 1 dia

In [9]:
dataAQV$by.day <- date(ymd_hms(dataAQV$date))
In [10]:
group.day <- aggregate(value ~ by.day + code + variable, dataAQV, mean)

preCOVID <- ggplot(data = group.day[group.day$by.day < "2020-03-01" & 
                                    group.day$variable %in% pollutants, ], 
                   aes(x = by.day, y = value, color=variable)
                  ) +
                geom_line() +
                geom_line() + 
                facet_wrap(~code, ncol=1)

COVID <- ggplot(data = group.day[group.day$by.day > "2020-03-01" & 
                                    group.day$variable %in% pollutants, ],
                aes(x = by.day, y = value, color=variable)
               ) +
               geom_line() +
               geom_line() +
               facet_wrap(~code, ncol=1)
    
grid.arrange(preCOVID, COVID, nrow = 1)

Resolucion de 1 mes

In [11]:
dataAQV$by.month <- round_date(ymd_hms(dataAQV$date), unit="month")
In [12]:
group.mth <- aggregate(value ~ by.month + code + variable, dataAQV, median)

preCOVID <- ggplot(data = group.mth[group.mth$by.month < "2020-03-01" & 
                                    group.mth$variable %in% pollutants, ], 
                   aes(x = by.month, y = value, color=variable)
                  ) +
                geom_line() +
                geom_line() + 
                facet_wrap(~code, ncol=1)

COVID <- ggplot(data = group.mth[group.mth$by.month > "2020-03-01" & 
                                 group.mth$variable %in% pollutants, ],
                aes(x = by.month, y = value, color=variable)
               ) +
               geom_line() +
               geom_line() +
               facet_wrap(~code, ncol=1)
    
grid.arrange(preCOVID, COVID, nrow = 1)

Ciclo Anual con Resolucion Semanal

In [13]:
dataAQV$annual.week <- round_date(ymd_hms(dataAQV$date), unit="weeks")
In [14]:
group.week <- aggregate(value  ~ annual.week + code + variable, dataAQV, median)

preCOVID <- ggplot(data = group.week[group.week$annual.week < "2020-03-01" & 
                                     group.week$variable %in% pollutants, ], 
                   aes(x = annual.week, y = value, color=variable)
                  ) +
                geom_line() +
                geom_line() + 
                facet_wrap(~code, ncol=1)

COVID <- ggplot(data = group.week[group.week$annual.week > "2020-03-01" &
                                  group.week$variable %in% pollutants, ],
                aes(x = annual.week, y = value, color=variable)
               ) +
               geom_line() +
               geom_line() +
               facet_wrap(~code, ncol=1)
    
grid.arrange(preCOVID, COVID, nrow = 1)

Ajuste anual

In [15]:
data.lm <- group.week[group.week$code == "es1563a" 
                   & group.week$variable == "o3",][ c("value", "annual.week")]
data.lm["month"] <- seq.int(nrow(data.lm))
head(data.lm)

lm.fit <- lm(value ~ sin(2*pi/(1*52.1429)*month)+cos(2*pi/(1*52.1429)*month), 
                data=data.lm)

ggplot() + 
        geom_point(data=data.lm, aes(x=annual.week, y=value), color="red") +
        geom_line(aes(x=data.lm$annual.week, y=lm.fit$fitted.values), color="blue")
A data.frame: 6 × 3
valueannual.weekmonth
<dbl><dttm><int>
6371 3.02015-01-041
6372 2.02015-01-112
637321.02015-01-183
637426.52015-01-254
637549.02015-02-015
637638.02015-02-086
In [16]:
data.lm$jaime <- data.lm$value - lm.fit$fitted.values
#data.lm$jaime <- data.lm$jaime - min(data.lm$jaime)

ggplot() + 
        geom_line(data=data.lm, aes(x=annual.week, y=scale(jaime)), color="red")
In [17]:
b <- function(i) {

    reslm <- lm(value ~ sin(2*pi/50*annual.week)+cos(2*pi/50*annual.week), 
                data=group.week.1[group.week.1$variable == i[1] 
                                  & group.week.1$code == i[2]
                                  ,]
               )
    
    ggplot(data=group.week.1[group.week.1$variable == i[1] 
                             & group.week.1$code == i[2]
                             ,]) + 
        geom_point(aes(x=annual.week, y=value, color=variable)) +
        geom_line(aes(x=annual.week, y=reslm$fitted.values, color=variable)) +
        facet_wrap(~code, ncol=1)
}

pairs <- mapply(c, pollutants, mapply(rep, levels(group.week.1$code), length(levels(group.week.1$code))), SIMPLIFY = T)
ploted <- lapply(pairs, b) 

grid.arrange(grobs=ploted, nrow = 4)
Error in levels(group.week.1$code): object 'group.week.1' not found
Traceback:

1. mapply(c, pollutants, mapply(rep, levels(group.week.1$code), 
 .     length(levels(group.week.1$code))), SIMPLIFY = T)
2. mapply(rep, levels(group.week.1$code), length(levels(group.week.1$code)))
3. levels(group.week.1$code)

Agrupar las Estaciones

In [18]:
group.est <- aggregate(value ~ code + variable + by.day, group.day, mean)

preCOVID <- ggplot(data = group.est[group.est$by.day < "2020-03-01" & 
                                    group.est$variable %in% pollutants, ], 
                   aes(x = by.day, y = value, color=variable)
                  ) +
                geom_line() +
                geom_line()

COVID <- ggplot(data = group.est[group.est$by.day > "2020-03-01" 
                                 & group.est$variable %in% pollutants
                                 , ],
                aes(x = by.day, y = value, color=variable)
               ) +
               geom_line() +
               geom_line()
    
grid.arrange(preCOVID, COVID, nrow = 1)

Guardar Datos en xlsx

In [ ]:
write.csv(dataAQV, "../xlsx/dataAQV.csv")